Getting Started

Resources

Exercise 1 - Rank the categories

How did you do?

Exercise 2 - Distance - common base

How did you do?

Exercise 3 - Area and Volume

How did you do?

Exercise 4 - Length

How did you do?

That's why stacked bar charts are never recommended

Finally colour, hue, saturation, density

Finally colour, hue, saturation, density (cont…)

Finally colour, hue, saturation, density (cont…)

Finally colour, hue, saturation, density (cont…)

## End of principles session…

praise()
## [1] "You are magnificent!"

Can we apply Cleveland's and Robbin's principles to multidimentional data?

Make the data stand out. Avoid superfluity

Use visually prominent graphical elements to show the data

Do not clutter the data region

Do not overdo the number of tick marks

Use a reference line when there is an important value that must be seen across the entire graph, but do not let that line interfere with the data

Overlapping plotting symbols must be visually distinguishable

Superimposed datasets must be readily visually discriminated

Visual clarity must be preserved under reduction and reproduction

Accessing the NIWA climate database

public_user <- cf_user() # Defaults to "public"

#select using menus
#out <- cf_datatype() 

or programatically

wind_dt  <-  cf_datatype(2, 1, 4, 1)

# Daily Rain
rain_dt  <-  cf_datatype(3, 1, 1)

# Daily temperature extremes
temp_dt  <-  cf_datatype(4, 2, 1)
all_dts  <-  wind_dt + rain_dt + temp_dt
all_dts
##                      dt.name              dt.type    dt.options dt.combo
## dt1                     Wind         Surface wind     [9amWind]      m/s
## dt2            Precipitation Rain (fixed periods)      [Daily ]         
## dt3 Temperature and Humidity         Max_min_temp [DailyMaxMin]
reefton_st = cf_station()
reefton_st
##           name network agent      start        end open distance       lat
## 1) Reefton Ews  F21182  3925 1960-08-01 2017-05-25 TRUE        0 -42.11578
##         lon
## 1) 171.8601

Reefton Data

reefton_data <-  cf_query(public_user, all_dts, reefton_st,
paste(as.Date(Sys.time()) - 333, "9"))
## connecting to CliFlo...
## reading data...
## UserName is = public
## Number of charged rows output = 0
## Number of free rows output = 1000
## Total number of rows output = 1000
## Note: The end date was revised to meet the maximum number of rows allowed per query [1000]
## or due to running out of rows in your subscription. Also, one or more datatypes may have been disabled due to the above.
## Copyright NIWA 2017 Subject to NIWA's Terms and Conditions
## See: http://cliflo.niwa.co.nz/pls/niwp/doc/terms.html
## Comments to: cliflo@niwa.co.nz
str(reefton_data)
## Formal class 'cfDataList' [package "clifro"] with 1 slot
##   ..@ .Data:List of 3
##   .. ..$ :Formal class 'cfWind' [package "clifro"] with 6 slots
##   .. .. .. ..@ .Data     :List of 8
##   .. .. .. .. ..$ : Factor w/ 1 level "Reefton Ews": 1 1 1 1 1 1 1 1 1 1 ...
##   .. .. .. .. ..$ : Factor w/ 334 levels "20160625:0900",..: 1 2 3 4 5 6 7 8 9 10 ...
##   .. .. .. .. ..$ : int [1:334] 109 193 7 117 109 180 315 283 254 129 ...
##   .. .. .. .. ..$ : num [1:334] 2.6 0.8 0.4 1.2 2.1 1 0.4 0.4 0.4 0.6 ...
##   .. .. .. .. ..$ : num [1:334] 17 85 75 17 44 52 0 45 57 59 ...
##   .. .. .. .. ..$ : num [1:334] 0.7 0.4 0.3 0.5 1 0.5 0.3 0.4 0.3 0.4 ...
##   .. .. .. .. ..$ : int [1:334] 1 1 1 1 1 1 1 1 1 1 ...
##   .. .. .. .. ..$ : Factor w/ 1 level "H": 1 1 1 1 1 1 1 1 1 1 ...
##   .. .. .. ..@ data_label: chr " 9am only surface wind (m/s)"
##   .. .. .. ..@ dt_name   : chr "Surface Wind"
##   .. .. .. ..@ dt_type   : chr " 9am only"
##   .. .. .. ..@ names     : chr [1:8] "Station" "Date(local)" "Dir(DegT)" "Speed(m/s)" ...
##   .. .. .. ..@ row.names : chr [1:334] "1" "2" "3" "4" ...
##   .. ..$ :Formal class 'cfRain' [package "clifro"] with 6 slots
##   .. .. .. ..@ .Data     :List of 8
##   .. .. .. .. ..$ : Factor w/ 1 level "Reefton Ews": 1 1 1 1 1 1 1 1 1 1 ...
##   .. .. .. .. ..$ : Factor w/ 334 levels "20160625:0900",..: 1 2 3 4 5 6 7 8 9 10 ...
##   .. .. .. .. ..$ : num [1:334] 1.6 5.6 12.2 0.2 0.4 0 0 0.2 0.2 0 ...
##   .. .. .. .. ..$ : logi [1:334] NA NA NA NA NA NA ...
##   .. .. .. .. ..$ : num [1:334] 0 0 0 0 0 0.2 0.4 0.6 0.7 1 ...
##   .. .. .. .. ..$ : num [1:334] 1.4 5.4 12 0 0.1 0 0 0 0 0 ...
##   .. .. .. .. ..$ : int [1:334] 24 24 24 24 24 24 24 24 24 24 ...
##   .. .. .. .. ..$ : Factor w/ 1 level "D": 1 1 1 1 1 1 1 1 1 1 ...
##   .. .. .. ..@ data_label: chr " Daily Rain (mm)"
##   .. .. .. ..@ dt_name   : chr "Rain"
##   .. .. .. ..@ dt_type   : chr " Daily"
##   .. .. .. ..@ names     : chr [1:8] "Station" "Date(local)" "Amount(mm)" "SofG" ...
##   .. .. .. ..@ row.names : chr [1:334] "1" "2" "3" "4" ...
##   .. ..$ :Formal class 'cfTemp' [package "clifro"] with 7 slots
##   .. .. .. ..@ .Data     :List of 12
##   .. .. .. .. ..$ : Factor w/ 1 level "Reefton Ews": 1 1 1 1 1 1 1 1 1 1 ...
##   .. .. .. .. ..$ : Factor w/ 332 levels "20160625:0900",..: 1 2 3 4 5 6 7 8 9 10 ...
##   .. .. .. .. ..$ : num [1:332] 13.1 11.8 14.2 14.5 13.8 10.5 14.5 9.8 11.6 9.6 ...
##   .. .. .. .. ..$ : int [1:332] 24 24 24 24 24 24 24 24 24 24 ...
##   .. .. .. .. ..$ : num [1:332] 6.2 5.9 6.2 7.7 5.3 1.5 -3.2 -2.9 1.4 2.1 ...
##   .. .. .. .. ..$ : int [1:332] 24 24 24 24 24 24 24 24 24 24 ...
##   .. .. .. .. ..$ : num [1:332] 4 3.1 5.6 3 1.6 -0.8 -6.6 -2.7 -0.8 2.8 ...
##   .. .. .. .. ..$ : int [1:332] 24 24 24 24 24 24 24 24 24 24 ...
##   .. .. .. .. ..$ : logi [1:332] NA NA NA NA NA NA ...
##   .. .. .. .. ..$ : logi [1:332] NA NA NA NA NA NA ...
##   .. .. .. .. ..$ : logi [1:332] NA NA NA NA NA NA ...
##   .. .. .. .. ..$ : Factor w/ 1 level "D": 1 1 1 1 1 1 1 1 1 1 ...
##   .. .. .. ..@ data_label: chr " Daily maximum/minimum temperature"
##   .. .. .. ..@ plot_label: language " Daily" ~ temperature ~ (degree * C)
##   .. .. .. ..@ dt_name   : chr "Max_min"
##   .. .. .. ..@ dt_type   : chr " Daily"
##   .. .. .. ..@ names     : chr [1:12] "Station" "Date(local)" "Tmax(C)" "Period(Hrs)" ...
##   .. .. .. ..@ row.names : chr [1:332] "1" "2" "3" "4" ...

From list to a dataframe

reefton_data_df <- reefton_data %>% 
  map_df(`[`)
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
head(reefton_data_df)
##       Station          Date.local Dir.DegT Speed.ms Dir StdDev Spd StdDev
## 1 Reefton Ews 2016-06-25 09:00:00      109      2.6         17        0.7
## 2 Reefton Ews 2016-06-26 09:00:00      193      0.8         85        0.4
## 3 Reefton Ews 2016-06-27 09:00:00        7      0.4         75        0.3
## 4 Reefton Ews 2016-06-28 09:00:00      117      1.2         17        0.5
## 5 Reefton Ews 2016-06-29 09:00:00      109      2.1         44        1.0
## 6 Reefton Ews 2016-06-30 09:00:00      180      1.0         52        0.5
##   Period.Hrs Freq Amount.mm SofG Deficit.mm Runoff.mm Tmax.C Tmin.C
## 1          1    H        NA   NA         NA        NA     NA     NA
## 2          1    H        NA   NA         NA        NA     NA     NA
## 3          1    H        NA   NA         NA        NA     NA     NA
## 4          1    H        NA   NA         NA        NA     NA     NA
## 5          1    H        NA   NA         NA        NA     NA     NA
## 6          1    H        NA   NA         NA        NA     NA     NA
##   Period.Hrs.1 Tgmin.C Period.Hrs.2 Tmean.C RHmean.% Period.Hrs.3
## 1           NA      NA           NA      NA       NA           NA
## 2           NA      NA           NA      NA       NA           NA
## 3           NA      NA           NA      NA       NA           NA
## 4           NA      NA           NA      NA       NA           NA
## 5           NA      NA           NA      NA       NA           NA
## 6           NA      NA           NA      NA       NA           NA

From wide to long for plotting

reefton_data_long <- reefton_data_df %>% 
  select(Station,Date.local,Amount.mm, Tmin.C,Speed.ms) %>% 
  gather(measure,data,-Station,-Date.local) %>% 
  filter(!is.na(data))

reefton_data_long$Year <- format(reefton_data_long$Date.local, "%Y")

Let's plot

(p_initial <- ggplot(reefton_data_long,aes(Date.local,data))+
  geom_point(aes(colour=measure)))

Let's apply Cleveland's principles…

What things are superflous?

Let's apply Cleveland's principles…

Do the graphical elements clearly show the data?

Let's apply Cleveland's principles…

Are the number of tick marks ok?

Let's apply Cleveland's principles…

Are overlapping plotting symbols visually distinguisable?

Let's apply Cleveland's principles…

Are superimposed datasets visually distinguisable?

How should we fix this graph?

Separate superimposed dataset

ggplot(reefton_data_long,aes(Date.local,data))+
  geom_point(aes(colour=measure))             +
  facet_grid(measure~.)                         #separate datasets

Common y scale not needed

ggplot(reefton_data_long,aes(Date.local,data))+
  geom_point(aes(colour=measure))             +
  facet_grid(measure~., scales = "free_y")          #variable y-scale

Make overlapping data easier to distinguish

ggplot(reefton_data_long,aes(Date.local,data))+
  geom_point(aes(colour=measure),shape=1)     +   #show data overlap
  facet_grid(measure~., scales = "free")         

Remove superfluous legend

ggplot(reefton_data_long,aes(Date.local,data))+
  geom_point(aes(colour=measure),shape=1)     +   
  facet_grid(measure~., scales = "free_y")      + 
  theme(legend.position="none")                   #legend not needed

Move labels to outside the y axis

ggplot(reefton_data_long,aes(Date.local,data))              +
  geom_point(aes(colour=measure),shape=1)                   +   
  facet_grid(measure~., scales = "free_y", switch="y")      +   #label on left size
  theme(legend.position="none",strip.placement = "outside")                                 #label outside of y axis

Label x and y axis meaningfully

ggplot(reefton_data_long,aes(Date.local,data))              +
  geom_point(aes(colour=measure),shape=1)                   +   
  facet_grid(measure~., scales = "free_y", switch="y")      +   
  theme(legend.position="none",strip.placement = "outside") +                   
  ylab("")                                                  + #label superfluous
  xlab("Date")                                                #make label more meaningful

Remove extra shading

ggplot(reefton_data_long,aes(Date.local,data))              +
  geom_point(aes(colour=measure),shape=1)                   +  
  facet_grid(measure~., scales = "free_y", switch="y")      +  
  theme(legend.position="none",strip.placement = "outside") +  
  ylab("")                                                  + 
  xlab("Date")                                              +                                              
  theme_bw()                                                 #overrides previous theme

Remove extra shading

ggplot(reefton_data_long,aes(Date.local,data))              +
  geom_point(aes(colour=measure),shape=1)                   + 
  facet_grid(measure~., scales = "free_y", switch="y")      + 
  ylab("")                                                  + 
  xlab("Date")                                              + 
  theme_bw()                                                + 
   theme(legend.position="none",strip.placement = "outside")                                 #legend not needed; move strip to outside of y axis

Change format of dates

p1 <- (
  ggplot(reefton_data_long,aes(as.Date(Date.local),data))   + #explicit date class
  geom_point(aes(colour=measure),shape=1)                   +
  facet_grid(measure~., scales = "free_y", switch="y")      +
  ylab("")                                                  +
  xlab("Date")                                              +
  theme_bw()                                                +
   theme(legend.position="none",strip.placement = "outside")+                   
 scale_x_date(date_labels = "%m-%Y")                         #format of date 
 )

Change format of dates

Change format of dates

p1 <- (
ggplot(reefton_data_long,aes(as.Date(Date.local),data))     + 
  geom_point(aes(colour=measure),shape=1)                   + 
  facet_grid(measure~., scales = "free_y", switch="y")      + 
  ylab("")                                                  + 
  theme_bw()                                                + 
   theme(legend.position="none",strip.placement = "outside")+                   
 scale_x_date(date_labels = "%B %Y")                       #format of date
)

Change format of dates

Changing the labels

reefton_data_long$measure <- factor(reefton_data_long$measure, 
  labels = c("Rainfall~amount~(mm)",
             "Wind~speed~(ms^-1)", 
             "Min~daily~temp~(degree*C)"
              ))

p1 <- (
ggplot(reefton_data_long,aes(as.Date(Date.local),data))     + 
  geom_point(aes(colour=measure),shape=1)                   + 
  facet_grid(measure~., scales = "free_y", switch="y", 
             labeller = label_parsed)                       + # parses math expressions
  ylab("")                                                  +
  xlab("Date")                                              +
  theme_bw()                                                +
  theme(legend.position="none",strip.placement = "outside") +                   
 scale_x_date(date_labels = "%B %Y")                        
)

Changing the labels

Finally remove the box around the labels

p_final <- (
ggplot(reefton_data_long,aes(as.Date(Date.local),data))     + 
  geom_point(aes(colour=measure),shape=1)                   + 
  facet_grid(measure~., scales = "free_y", switch="y", 
             labeller = label_parsed)                       + 
  ylab("")                                                  +
  xlab("Date")                                              +
  theme_bw()                                                +
   theme(legend.position="none",strip.placement = "outside",  
         strip.background = element_blank())                + #no background                  
 scale_x_date(date_labels = "%B %Y")
)

Finally remove the box around the labels

Compare back to the initial default graph